path <- "/Users/acas/Dropbox/Post-docs/Roux_Lab/messor_analyses/popgen_analyses/clustering_PCA/admixture/MaciRef_vcfAllfRef"
dt <- read.delim("/Users/acas/Dropbox/Post-docs/Roux_Lab/messor_analyses/messor_contig_AllGeneticData.txt") %>%
    rename(Ind = sra_access_number) %>%
    mutate(lineage = case_when(mitochondrial %in% "" ~ species, TRUE ~ mitochondrial))

colsT <- c(brewer.pal(3, "Greens")[-2], brewer.pal(8, "Blues")[c(2, 5, 7)], brewer.pal(9,
    "Reds")[c(2, 3, 5, 7, 8, 9)], "#C84476", "#6350B5", "#BF88C2", "#C7D144", "#6DCEB6",
    "#CA4AC3", "#D4953D", "#7735D8")
names(colsT) <- c("mMbar1", "mMbar2", "mMebe1", "mMebe3", "mMebe2", "structor3",
    "ponticus2", "mcarthuri7", "muticus6", "structor4", "ibericus1", "aciculatus",
    "arenarius", "capitatus", "decipiens", "minor", "nodentatus", "oertzeni", "wasmanni")

pcaPlots <- function(path = path, analysis = analysis, vcf = vcfi) {
    gen <- vcfR2genind(vcfi)
    loci <- locNames(gen)

    # m <- read.delim(list.files(paste0(path,'/',analysis), pattern = 'map',
    # recursive = F, full.names = T), header = FALSE) %>% mutate(V2 =
    # str_replace(V2, ':','_')) ### to plot PCA with the same number of loci in
    # admixture analysis toRemove=which(!loci %in% m$V2) gen=ge[loc=-toRemove]

    x <- tab(gen, NA.method = "mean")
    pca = dudi.pca(x, center = TRUE, scannf = FALSE, scale = FALSE, nf = 3)

    percent = pca$eig/sum(pca$eig) * 100

    PCAdt <- data.frame(pca$li) %>%
        rownames_to_column("Ind") %>%
        left_join(filter(dt, Ind %in% indNames(gen)), by = "Ind") %>%
        mutate(Ind_h = paste0(Ind, substring(hybrid, 1, 1)))

    PCAdt <- PCAdt %>%
        group_by(species) %>%
        dplyr::summarise(across(Axis1:Axis3, mean, na.rm = TRUE)) %>%
        full_join(PCAdt, by = "species", suffix = c(".cen", ""))

    # Custom x and y labels
    axis1Lab = paste("Axis 1 (", format(round(percent[1], 1), nsmall = 1), " %)",
        sep = "")
    axis2Lab = paste("Axis 2 (", format(round(percent[2], 1), nsmall = 1), " %)",
        sep = "")
    axis3Lab = paste("Axis 3 (", format(round(percent[3], 1), nsmall = 1), " %)",
        sep = "")

    ggtheme <- function(axisA, axisB, axisAlab, axisBlab) {
        ggplot(data = PCAdt, aes_string(x = axisA, y = axisB)) + labs(x = axisAlab,
            y = axisBlab) + geom_hline(yintercept = 0) + geom_vline(xintercept = 0) +
            geom_point(aes(color = datatype_nuc, shape = caste, fill = lineage),
                size = 3, show.legend = TRUE, alpha = 0.85) + geom_text_repel(aes(label = Ind_h),
            max.overlaps = 15, size = 2, show.legend = FALSE, segment.size = 0.2) +
            scale_shape_manual(values = c(21, 24)) + scale_fill_manual(values = colsT[unique(PCAdt$lineage)]) +
            scale_colour_manual(values = c("black", "gray90")) + theme(axis.text.y = element_text(colour = "black",
            size = 12), axis.text.x = element_text(colour = "black", size = 12),
            axis.title = element_text(colour = "black", size = 12), panel.border = element_rect(colour = "black",
                fill = NA, size = 1), panel.background = element_blank(), plot.title = element_text(hjust = 0.5,
                size = 15), legend.position = "bottom") + labs(fill = "", color = "",
            shape = "") + guides(shape = guide_legend(nrow = 2), colour = guide_legend(nrow = 2),
            fill = guide_legend(nrow = 2, override.aes = list(shape = 21, alpha = 1)))
    }

    # Scatter plot axis 1 vs. 2
    PCA12 <- ggtheme("Axis1", "Axis2", axis1Lab, axis2Lab)
    PCA13 <- ggtheme("Axis1", "Axis3", axis1Lab, axis3Lab)
    PCA23 <- ggtheme("Axis2", "Axis3", axis2Lab, axis3Lab)

    PCAplot <- ggarrange(PCA12, ggarrange(PCA13, PCA23, nrow = 1, legend = "none"),
        ncol = 1, heights = c(1, 0.65), legend = "top")

    annotate_figure(PCAplot, bottom = text_grob(paste(nLoc(gen), "SNPs used"), hjust = 1,
        x = 1, size = 10))

}

Messor Queens

analysis <- "MessorQueens"
vcfi <- read.vcfR(list.files(paste0(path, "/", analysis, "/"), pattern = "_filteredThin.recode.vcf",
    recursive = F, full.names = T), verbose = FALSE)
pcaPlots(path, analysis, vcfi)

gen <- vcfR2genind(vcfi)
ind <- indNames(gen)
gen1 <- gen[-which(ind %in% c("Mtar_A7", "Mibe_A3", "Maeg_A5"))]

x <- tab(gen1, NA.method = "mean")
pca = dudi.pca(x, center = TRUE, scannf = FALSE, scale = FALSE, nf = 3)

percent = pca$eig/sum(pca$eig) * 100

PCAdt <- data.frame(pca$li) %>%
    rownames_to_column("Ind") %>%
    left_join(filter(dt, Ind %in% indNames(gen)), by = "Ind") %>%
    mutate(Ind_h = paste0(Ind, substring(hybrid, 1, 1)))

PCAdt <- PCAdt %>%
    group_by(species) %>%
    dplyr::summarise(across(Axis1:Axis3, mean, na.rm = TRUE)) %>%
    full_join(PCAdt, by = "species", suffix = c(".cen", "")) %>%
    mutate(spLabel = case_when(species %in% c("aciculatus", "capitatus", "decipiens",
        "minor", "wasmanni") ~ "other", TRUE ~ species), idLabel2 = case_when(species %in%
        c("aciculatus", "capitatus", "decipiens", "minor", "wasmanni") ~ species,
        TRUE ~ Ind_h))

# Custom x and y labels
axis1Lab = paste("Axis 1 (", format(round(percent[1], 1), nsmall = 1), " %)", sep = "")
axis2Lab = paste("Axis 2 (", format(round(percent[2], 1), nsmall = 1), " %)", sep = "")
axis3Lab = paste("Axis 3 (", format(round(percent[3], 1), nsmall = 1), " %)", sep = "")

cols <- c(brewer.pal(3, "Greens")[-2], brewer.pal(3, "Blues")[-1], brewer.pal(9,
    "Reds")[c(1, 3, 5, 7, 8, 9)], rep("grey50", 5))
names(cols) <- c("mMbar1", "mMbar2", "mMebe1", "mMebe2", "structor3", "ponticus2",
    "mcarthuri7", "muticus6", "structor4", "ibericus1", "aciculatus", "capitatus",
    "decipiens", "minor", "wasmanni")

ggtheme <- function(axisA, axisB, axisAlab, axisBlab) {
    ggplot(data = PCAdt, aes_string(x = axisA, y = axisB)) + labs(x = axisAlab, y = axisBlab) +
        geom_hline(yintercept = 0) + geom_vline(xintercept = 0) + geom_point(aes(shape = fct_inorder(spLabel),
        fill = lineage), size = 3, show.legend = TRUE, alpha = 0.8) + geom_text_repel(aes(label = idLabel2),
        max.overlaps = 15, size = 3, show.legend = FALSE, segment.size = 0.2) + scale_shape_manual(values = c(21,
        24, 22, 23)) + scale_fill_manual(values = cols) + labs(shape = "Species") +
        scale_colour_manual(values = c("black", "white")) + theme(axis.text.y = element_text(colour = "black",
        size = 12), axis.text.x = element_text(colour = "black", size = 12), axis.title = element_text(colour = "black",
        size = 12), panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.background = element_blank(), plot.title = element_text(hjust = 0.5,
            size = 15), legend.position = "bottom") + guides(fill = guide_legend(override.aes = list(shape = 21,
        alpha = 0.8)))
}

# Scatter plot axis 1 vs. 2
PCA12 <- ggtheme("Axis1", "Axis2", axis1Lab, axis2Lab)
# PCA13 <- ggtheme('Axis1', 'Axis3', axis1Lab, axis3Lab) PCA23 <-
# ggtheme('Axis2', 'Axis3', axis2Lab, axis3Lab)

# PCAplot <- ggarrange(PCA12,PCA13, #PCA23, nrow = 1, legend = 'bottom',
# common.legend = TRUE) annotate_figure(PCAplot, bottom =
# text_grob(paste(nLoc(gen1), 'SNPs used'), hjust = 1, x = 1, size = 10))
PCA12


M.barbarus system

M.barbarus + Decipiens + Capitatus

analysis <- "MbarDecCap"
vcfi <- read.vcfR(list.files(paste0(path, "/", analysis, "/"), pattern = "_filteredThin.recode.vcf",
    recursive = F, full.names = T), verbose = FALSE)
pcaPlots(path, analysis, vcfi)


M.barbarus

analysis <- "Mbar"

vcfi <- read.vcfR(list.files(paste0(path, "/", analysis, "/"), pattern = "_filteredThin.recode.vcf",
    recursive = F, full.names = T), verbose = FALSE)
pcaPlots(path, analysis, vcfi)


M.barbarus queens

analysis <- "MbarQueens"

vcfi <- read.vcfR(list.files(paste0(path, "/", analysis, "/"), pattern = "_filteredThin.recode.vcf",
    recursive = F, full.names = T), verbose = FALSE)
pcaPlots(path, analysis, vcfi)


M.barbarus from CRAM file

analysis <- "MbarCRAM"

vcfi <- read.vcfR(list.files(paste0(path, "/", analysis, "/"), pattern = "cramfilteredThin.recode.vcf",
    recursive = F, full.names = T), verbose = FALSE)
pcaPlots(path, analysis, vcfi)


M.ebeninus system

M.ebeninus + Wasmani + Minor

analysis <- "MebeWesMin"

vcfi <- read.vcfR(list.files(paste0(path, "/", analysis, "/"), pattern = "_filteredThin.recode.vcf",
    recursive = F, full.names = T), verbose = FALSE)
# pcaPlots(path, analysis, vcfi)
gen <- vcfR2genind(vcfi)
ind <- indNames(gen)
gen1 <- gen[-which(ind %in% c("SRR4292912", "SRR4292913"))]

x <- tab(gen1, NA.method = "mean")
pca = dudi.pca(x, center = TRUE, scannf = FALSE, scale = FALSE, nf = 3)

percent = pca$eig/sum(pca$eig) * 100

PCAdt <- data.frame(pca$li) %>%
    rownames_to_column("Ind") %>%
    left_join(filter(dt, Ind %in% indNames(gen)), by = "Ind") %>%
    mutate(Ind_h = paste0(Ind, substring(hybrid, 1, 1)))

PCAdt <- PCAdt %>%
    group_by(species) %>%
    dplyr::summarise(across(Axis1:Axis3, mean, na.rm = TRUE)) %>%
    full_join(PCAdt, by = "species", suffix = c(".cen", ""))

# Custom x and y labels
axis1Lab = paste("Axis 1 (", format(round(percent[1], 1), nsmall = 1), " %)", sep = "")
axis2Lab = paste("Axis 2 (", format(round(percent[2], 1), nsmall = 1), " %)", sep = "")
axis3Lab = paste("Axis 3 (", format(round(percent[3], 1), nsmall = 1), " %)", sep = "")

ggtheme <- function(axisA, axisB, axisAlab, axisBlab) {
    ggplot(data = PCAdt, aes_string(x = axisA, y = axisB)) + labs(x = axisAlab, y = axisBlab) +
        geom_hline(yintercept = 0) + geom_vline(xintercept = 0) + geom_point(aes(color = datatype_nuc,
        shape = caste, fill = lineage), size = 3, show.legend = TRUE, alpha = 0.8) +
        geom_text_repel(aes(label = Ind_h), max.overlaps = 15, size = 2, show.legend = FALSE,
            segment.size = 0.2) + scale_shape_manual(values = c(21, 24)) + scale_fill_manual(values = colsT[unique(PCAdt$lineage)]) +
        scale_colour_manual(values = c("black", "white")) + theme(axis.text.y = element_text(colour = "black",
        size = 12), axis.text.x = element_text(colour = "black", size = 12), axis.title = element_text(colour = "black",
        size = 12), panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.background = element_blank(), plot.title = element_text(hjust = 0.5,
            size = 15), legend.position = "top") + labs(fill = "", color = "", shape = "") +
        guides(shape = guide_legend(nrow = 2), colour = guide_legend(nrow = 2), fill = guide_legend(nrow = 2,
            override.aes = list(shape = 21, alpha = 0.8)))
}

# Scatter plot axis 1 vs. 2
PCA12 <- ggtheme("Axis1", "Axis2", axis1Lab, axis2Lab)
PCA13 <- ggtheme("Axis1", "Axis3", axis1Lab, axis3Lab)
PCA23 <- ggtheme("Axis2", "Axis3", axis2Lab, axis3Lab)

PCAplot <- ggarrange(PCA12, ggarrange(PCA13, PCA23, nrow = 1, legend = "none"), ncol = 1,
    heights = c(1, 0.65), legend = "top")

annotate_figure(PCAplot, bottom = text_grob(paste(nLoc(gen1), "SNPs used"), hjust = 1,
    x = 1, size = 10))


M.ebeninus

analysis <- "Mebe"

vcfi <- read.vcfR(list.files(paste0(path, "/", analysis, "/"), pattern = "_filteredThin.recode.vcf",
    recursive = F, full.names = T), verbose = FALSE)
pcaPlots(path, analysis, vcfi)


M.ebeninus queens

analysis <- "MebeQueens"

vcfi <- read.vcfR(list.files(paste0(path, "/", analysis, "/"), pattern = "_filteredThin.recode.vcf",
    recursive = F, full.names = T), verbose = FALSE)
# pcaPlots(path, analysis, vcfi)
gen <- vcfR2genind(vcfi)
ind <- indNames(gen)
gen1 <- gen[-which(ind %in% "Maeg_A5")]

x <- tab(gen1, NA.method = "mean")
pca = dudi.pca(x, center = TRUE, scannf = FALSE, scale = FALSE, nf = 3)

percent = pca$eig/sum(pca$eig) * 100

PCAdt <- data.frame(pca$li) %>%
    rownames_to_column("Ind") %>%
    left_join(filter(dt, Ind %in% indNames(gen)), by = "Ind") %>%
    mutate(Ind_h = paste0(Ind, substring(hybrid, 1, 1)))

PCAdt <- PCAdt %>%
    group_by(species) %>%
    dplyr::summarise(across(Axis1:Axis3, mean, na.rm = TRUE)) %>%
    full_join(PCAdt, by = "species", suffix = c(".cen", ""))

# Custom x and y labels
axis1Lab = paste("Axis 1 (", format(round(percent[1], 1), nsmall = 1), " %)", sep = "")
axis2Lab = paste("Axis 2 (", format(round(percent[2], 1), nsmall = 1), " %)", sep = "")
axis3Lab = paste("Axis 3 (", format(round(percent[3], 1), nsmall = 1), " %)", sep = "")

ggtheme <- function(axisA, axisB, axisAlab, axisBlab) {
    ggplot(data = PCAdt, aes_string(x = axisA, y = axisB)) + labs(x = axisAlab, y = axisBlab) +
        geom_hline(yintercept = 0) + geom_vline(xintercept = 0) + geom_point(aes(color = datatype_nuc,
        shape = caste, fill = lineage), size = 3, show.legend = TRUE, alpha = 0.8) +
        geom_text_repel(aes(label = Ind_h), max.overlaps = 15, size = 2, show.legend = FALSE,
            segment.size = 0.2) + scale_shape_manual(values = c(21, 24)) + scale_fill_manual(values = colsT[unique(PCAdt$lineage)]) +
        scale_colour_manual(values = c("black", "white")) + theme(axis.text.y = element_text(colour = "black",
        size = 12), axis.text.x = element_text(colour = "black", size = 12), axis.title = element_text(colour = "black",
        size = 12), panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.background = element_blank(), plot.title = element_text(hjust = 0.5,
            size = 15), legend.position = "top") + labs(fill = "", color = "", shape = "") +
        guides(shape = guide_legend(nrow = 2), colour = guide_legend(nrow = 2), fill = guide_legend(nrow = 2,
            override.aes = list(shape = 21, alpha = 0.8)))
}

# Scatter plot axis 1 vs. 2
PCA12 <- ggtheme("Axis1", "Axis2", axis1Lab, axis2Lab)
PCA13 <- ggtheme("Axis1", "Axis3", axis1Lab, axis3Lab)
PCA23 <- ggtheme("Axis2", "Axis3", axis2Lab, axis3Lab)

PCAplot <- ggarrange(PCA12, ggarrange(PCA13, PCA23, nrow = 1, legend = "none"), ncol = 1,
    heights = c(1, 0.65), legend = "top")

annotate_figure(PCAplot, bottom = text_grob(paste(nLoc(gen1), "SNPs used"), hjust = 1,
    x = 1, size = 10))


M.structor system

All M.structor

analysis <- "Mstr"

vcfi <- read.vcfR(list.files(paste0(path, "/", analysis, "/"), pattern = "_filteredThin.recode.vcf",
    recursive = F, full.names = T), verbose = FALSE)
pcaPlots(path, analysis, vcfi)


Mstr127 - Ibericus + Ponticus + Mcarthuri

analysis <- "Mstr127"

vcfi <- read.vcfR(list.files(paste0(path, "/", analysis, "/"), pattern = "_filteredThin.recode.vcf",
    recursive = F, full.names = T), verbose = FALSE)
pcaPlots(path, analysis, vcfi)


Mstr346 - Structor3 + Structor4 + Muticus

analysis <- "Mstr346"

vcfi <- read.vcfR(list.files(paste0(path, "/", analysis, "/"), pattern = "_filteredThin.recode.vcf",
    recursive = F, full.names = T), verbose = FALSE)
pcaPlots(path, analysis, vcfi)


M.structor queens

analysis <- "MstrQueens"

vcfi <- read.vcfR(list.files(paste0(path, "/", analysis, "/"), pattern = "_filteredThin.recode.vcf",
    recursive = F, full.names = T), verbose = FALSE)
pcaPlots(path, analysis, vcfi)